library(pacman)
p_load(stringr, tidyverse, magrittr, ggpubr, ggsignif)
p_load(fields, fda, oro.nifti, spam, Hmisc, knitr, rqPen, quantreg, scam) Check your parameter setting here:
knot_space <- 12
pve_threshold <- 0.8
set.seed(657)
tau_lev <- c(0.05, 0.5, 0.95)These are the functions needed to build the tensor product of basis functions.
resize_image <- function(mask, img = mask){
mask <- drop(mask)
img <- drop(img)
dims <- dim(mask)
if(!all.equal(dim(img), dims)) stop("Mask and img must have the same dimensions!")
nonzero_coord <- matrix(NA, nrow = length(dims), ncol = 2)
rownames(nonzero_coord) <- paste0("Dim", 1:length(dims))
colnames(nonzero_coord) <- c("first","last")
for(i in 1:length(dims)) nonzero_coord[i,] <- range(which(apply(mask,i,sum) != 0))
###select first and last coordinate for which the sum is non zero
resized_array <- (img[,,]*(mask[,,]>0))[nonzero_coord[1,"first"]:nonzero_coord[1,"last"],
nonzero_coord[2,"first"]:nonzero_coord[2,"last"],
nonzero_coord[3,"first"]:nonzero_coord[3,"last"]]
return(list(array = resized_array, original_coord = nonzero_coord))
}
calc_projection_Bspl <- function(knot_space, mask_fname){
start_time <- Sys.time()
mask <- readNIfTI(fname = mask_fname) %>%
magrittr::is_greater_than(0.5)*1 ### to avoid fuzzy boundaries
mask_subset <- resize_image(mask)
dims_mask <- dim(mask_subset$array)
voxel_grid_nonzero_mask<- which(as.vector(mask_subset$array) != 0)
basismat_dim1 <- bsplineS(x = 1:dims_mask[1], breaks = c(seq(1,dims_mask[1], by = knot_space), dims_mask[1]), norder=3, nderiv=0, returnMatrix=TRUE)
basismat_dim2 <- bsplineS(x = 1:dims_mask[2], breaks = c(seq(1,dims_mask[2], by = knot_space), dims_mask[2]), norder=3, nderiv=0, returnMatrix=TRUE)
basismat_dim3 <- bsplineS(x = 1:dims_mask[3], breaks = c(seq(1,dims_mask[3], by = knot_space), dims_mask[3]), norder=3, nderiv=0, returnMatrix=TRUE)
design_mat <- spam::kronecker(basismat_dim3, basismat_dim2, make.dimnames = TRUE) %>%
spam::kronecker(., basismat_dim1, make.dimnames = TRUE) %>%
.[voxel_grid_nonzero_mask, ] %>%
.[, colSums(.) != 0]
#design_mat@x[design_mat@x<0.001] <- 0
#design_mat <- design_mat[, colSums(design_mat) != 0]
cat("The number of basis functions is ", ncol(design_mat),".\n", sep = "")
return(list("basis_mat" = design_mat,
"voxel_grid_nonzero_mask" = voxel_grid_nonzero_mask,
"dims_mask" = dims_mask,
"mask_subset" = mask_subset))
}
slices.plot <- function(image.vec, dims = dims_mask,
voxels = results$voxel_grid_nonzero_mask,
col.threshold = 0,
legend.range = range(image.vec), ...){
img <- rep(NA, prod(dims_mask))
img[voxels] <- image.vec
ybr <- seq(legend.range[1], legend.range[2], length.out = 101)
rc1 <- colorRampPalette(colors = c("blue", "white"), space="Lab")(length(which(ybr < col.threshold)))
rc2 <- colorRampPalette(colors = c("white", "red"), space="Lab")(length(which(ybr > col.threshold))-1)
rampcols <- c(rc1, rc2)
if(col.threshold == 0){
leg.text <- c(format(min(ybr), scientific = TRUE, digits = 3),
col.threshold, format(max(ybr), scientific = TRUE, digits = 3))
} else{
leg.text <- c(floor(min(ybr)), col.threshold, ceiling(max(ybr)))
}
overlay(x = nifti(resize_image(mask, img_template)$array),
y = nifti(array(img, dim = dims_mask)),
plot.type="single", z = seq(16, 136, by = 5),
col.y = scales::alpha(rampcols, 0.45), useRaster = TRUE, oma = c(4,0,0,0))
fields::image.plot(legend.only=TRUE, zlim= range(ybr),
col = rampcols, horizontal = TRUE,
legend.mar = 1.5, legend.cex=0.5, legend.width = 0.8,
legend.args = list(text = leg.text,
col="white", cex=0.7, side = 1,
at = c(min(ybr),col.threshold, max(ybr))))
#par(xpd = TRUE)
#lab_img <- 34 + seq(16, 136, by = 5)
#annot_grid <- expand.grid(0.025 + seq(0, 0.95, by = 0.2), seq(0.97, 0.1, by = -0.175))
#library(grid)
#grid.text(lab_img, x=annot_grid[,1], y=annot_grid[,2],gp=gpar(fontsize=10, cex = 0.9, col = "white"))
}
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
forest.rq.plot <- function(pred.interval, mycol){
data_forest_center <- pred.interval$data_forest_center
mycol <- mycol[as.numeric(data_forest_center$Dx)]
excesspoints <- pred.interval$excesspoints
pal <- c("#009E73", "gold3", "#D55E00")
forest.rq <- ggplot(data_forest_center,
aes(y=id, x = AgePredMed, xmin = AgePredLower,
xmax=AgePredUpper, colour = Dx))+
geom_point(cex=0.5)+
scale_colour_manual(values=pal) +
ggExtra::removeGridY() +
theme(axis.ticks.y = element_blank(), axis.text.y = element_blank(),
axis.line.y = element_line(linetype = "blank"),
strip.text = element_text(size = 12))+
geom_errorbarh(height = 0.01) +
geom_vline(xintercept = 0, linetype = "dashed") +
theme(legend.position = "none",
strip.text.x = element_text(size = 12)) +
geom_point(data = excesspoints, mapping = aes_string(x = "x",y = "id"),
inherit.aes = F, size = 1, shape = 18) +
facet_wrap(. ~ Dx, scales = "free_y", shrink = TRUE) +
xlab('Difference from chronological age') +
ylab('Subjects')
print(forest.rq)
}
# fqr_prediction <- function(train_sub, test_sub, pred_table, qr_pen = "MCP", return_func_coef = FALSE){ #######
#
# data_projected_train <- data_projected[train_sub,]
# data_projected_test <- data_projected[test_sub,]
#
# coef_data <- data_projected_train %>% scale(scale = F)
# FPCA_mat <- Matrix::tcrossprod(coef_data, basis_W_half)/sqrt(nrow(coef_data)-1)
#
# tmpSVD <- svd(FPCA_mat, nu = 0) ###, nv = 50
# values <- tmpSVD$d^2
# #plot(values, ylim = c(0, max(values)+100), main = "Screeplot", type = "b")
#
# ncomp <- which(cumsum(values)/sum(values) >= pve_threshold)[1]
# vectors <- Matrix::solve(basis_W_half, tmpSVD$v[, 1:ncomp])
#
# scores_mat <- data_projected %>%
# data.matrix(., rownames.force = TRUE) %>%
# sweep(., 2, colMeans(data_projected_train)) %>%
# magrittr::multiply_by_matrix(.,int_mat) %*% vectors %>%
# as.matrix()
#
# regr_data <- scores_mat %>%
# as.data.frame(.) %>%
# tibble::rownames_to_column(., var = "PTID") %>%
# right_join(data_ADNI_bl_817, ., by = "PTID") %>%
# select(PTID, AGE, starts_with('V', ignore.case = FALSE)) %>%
# tibble::column_to_rownames(., "PTID")
#
# set.seed(657)
# model_lower <- rqPen::cv.rq.pen(x = as.matrix(regr_data[train_sub, -1]),
# y = regr_data[train_sub, ]$AGE,
# tau = tau_lev[1],
# method = "fn",
# penalty = qr_pen)
# model_med <- rqPen::cv.rq.pen(x = as.matrix(regr_data[train_sub, -1]),
# y = regr_data[train_sub, ]$AGE,
# tau = tau_lev[2],
# method = "fn",
# penalty = qr_pen)
# model_upper <- rqPen::cv.rq.pen(x = as.matrix(regr_data[train_sub, -1]),
# y = regr_data[train_sub,]$AGE,
# tau = tau_lev[3],
# method = "fn",
# penalty = qr_pen)
#
# pred_table[test_sub, 1] <- model_lower %>%
# predict(newx = as.matrix(regr_data[test_sub,-1]))
# pred_table[test_sub, 2] <- model_med %>%
# predict(newx = as.matrix(regr_data[test_sub,-1]))
# pred_table[test_sub, 3] <- model_upper %>%
# predict(newx = as.matrix(regr_data[test_sub,-1]))
#
# if(return_func_coef == FALSE) return(pred_table)
# else return(list(pred_table = pred_table,
# no_fpc = ncomp,
# evec = vectors,
# model_med_coef = with(model_med,
# models[[which(cv[,1] == lambda.min)]]$coefficients[-1]) %>% as(., "sparseMatrix") ))
# } fqr_prediction <- function(train_sub,
test_sub,
data_demo = data_ADNI_bl_817,
pred_table,
qr_pen = "LASSO",
qr_postLASSO = FALSE,
lambda = NULL,
tau_levels = tau_lev,
return_func_coef = FALSE){
data_projected_train <- data_projected[train_sub,]
data_projected_test <- data_projected[test_sub,]
coef_data <- data_projected_train %>% scale(scale = F)
FPCA_mat <- Matrix::tcrossprod(coef_data, basis_W_half) / sqrt(nrow(coef_data) - 1)
tmpSVD <- svd(FPCA_mat, nu = 0) ###, nv = 50
values <- tmpSVD$d ^ 2
ncomp <- which(cumsum(values) / sum(values) >= pve_threshold)[1]
vectors <- Matrix::solve(basis_W_half, tmpSVD$v[, 1:ncomp])
scores_mat <- data_projected %>%
data.matrix(., rownames.force = TRUE) %>%
sweep(., 2, colMeans(data_projected_train)) %>%
magrittr::multiply_by_matrix(., int_mat) %*% vectors %>%
as.matrix()
regr_data <- scores_mat %>%
as.data.frame(.) %>%
tibble::rownames_to_column(., var = "PTID") %>%
right_join(data_demo, ., by = "PTID") %>%
select(PTID, AGE, starts_with('V', ignore.case = FALSE)) %>%
tibble::column_to_rownames(., "PTID")
set.seed(657)
model_lower <- rqPen::cv.rq.pen(x = as.matrix(regr_data[train_sub, -1]),
y = regr_data[train_sub, ]$AGE,
tau = tau_levels[1],
method = "br",
penalty = qr_pen)
model_med <- rqPen::cv.rq.pen(x = as.matrix(regr_data[train_sub, -1]),
y = regr_data[train_sub, ]$AGE,
tau = tau_levels[2],
method = "br",
penalty = qr_pen)
model_upper <- rqPen::cv.rq.pen(x = as.matrix(regr_data[train_sub, -1]),
y = regr_data[train_sub,]$AGE,
tau = tau_levels[3],
#lambda = lambda,
method = "br",
penalty = qr_pen)
model_lower_coef <- with(model_lower, models[[which(cv[, 1] == lambda.min)]]$coefficients[-1])
model_med_coef <- with(model_med, models[[which(cv[, 1] == lambda.min)]]$coefficients[-1])
model_upper_coef <- with(model_upper, models[[which(cv[, 1] == lambda.min)]]$coefficients[-1])
lambda_min_med <- model_med$lambda.min
if (return_func_coef == TRUE)
print(cv_plots(model_med, logLambda = TRUE, loi = NULL))
if (qr_postLASSO == TRUE) {
model_lower <- model_lower_coef %>%
subset(., !not(.)) %>%
when(length(names(.)) < 1 ~ 1, ~ names(.)) %>%
paste(., collapse = "+") %>%
paste("AGE ~ ", .) %>%
formula(.) %>%
rq(., tau = tau_lev[1], data = regr_data[train_sub,])
model_med <- model_med_coef %>%
subset(., !not(.)) %>%
when(length(names(.)) < 1 ~ 1, ~ names(.)) %>%
paste(., collapse = "+") %>%
paste("AGE ~ ", .) %>%
formula(.) %>%
rq(., tau = tau_lev[2], data = regr_data[train_sub,])
model_upper <- model_upper_coef %>%
subset(., !not(.)) %>%
when(length(names(.)) < 1 ~ 1, ~ names(.)) %>%
paste(., collapse = "+") %>%
paste("AGE ~ ", .) %>%
formula(.) %>%
rq(., tau = tau_lev[3], data = regr_data[train_sub,])
pred_table[test_sub, 1] <- model_lower %>%
predict(newdata = regr_data[test_sub, ])
pred_table[test_sub, 2] <- model_med %>%
predict(newdata = regr_data[test_sub, ])
pred_table[test_sub, 3] <- model_upper %>%
predict(newdata = regr_data[test_sub, ])
} else {
pred_table[test_sub, 1] <- model_lower %>%
predict(newx = as.matrix(regr_data[test_sub,-1]))
pred_table[test_sub, 2] <- model_med %>%
predict(newx = as.matrix(regr_data[test_sub,-1]))
pred_table[test_sub, 3] <- model_upper %>%
predict(newx = as.matrix(regr_data[test_sub,-1]))
}
if (return_func_coef == FALSE)
return(pred_table)
else
return(list(pred_table = pred_table,
no_fpc = ncomp,
evec = vectors,
model_med_coef = as(model_med_coef, "sparseMatrix"),
model_lower_coef = as(model_lower_coef, "sparseMatrix"),
model_upper_coef = as(model_upper_coef, "sparseMatrix"),
lambda_min = lambda_min_med,
no_comp = ncomp
)
)
} Here the calculation.
system.time(
calc_projection_Bspl(knot_space, mask_fname = "smoothed_mask_125.nii.gz") %>%
list2env(., envir = .GlobalEnv)
)## The number of basis functions is 2694.
## user system elapsed
## 20.50 13.91 36.06
#save.image(file = "basis_functions.RData")Now go on the cluster where you store your images.
### Generate image list
ls all_images_folder | wc -l
rm imagelist.txt
ls -rt path/to/images/* > imagelist.txt
### Import basis_functions.RData
pscp C:\Users\Asus\Desktop\IM-AGEING\basis_functions.RData u1560346@buster.stats.warwick.ac.uk:/storage/u1560346
### Run the projection script
qsub project_TBM_fast.R --ws "basis_functions.RData"
### Concatenate all the single-image predictions
cat ./TBM_projections_fast/* > data_projected.dat
#if COLUMNWISE paste PAC_projections/* -d '\t' > filemerged.txt
### Total elapsed time
qacct -j <jobID> -t <task_list> | grep -e start_time -e end_time | sort -k5 | awk 'NR==1; END{print}' | awk 'BEGIN { ORS = "\t" } {print $5}' | awk '{print $1 "\t" $2 "\t" "knot_spacing"}' >> runtimeTBM.txt
### Download all-images projections
pscp u1560346@buster.stats.warwick.ac.uk:/storage/u1560346/data_projected.dat C:\Users\Asus\Desktop\IM-AGEING#load("basis_functions.RData")
data_projected <- paste0("data_projected_", knot_space, "mm.dat") %>%
data.table::fread(.) %>%
data.frame(., row.names = 1) %>%
as.matrix()
start_time <- Sys.time()
int_mat <- Matrix::crossprod(basis_mat) ###matrix W
basis_W_half <- try(Matrix::chol(int_mat))
if("try-error" %in% class(basis_W_half)) basis_W_half <- Matrix::chol(int_mat, pivot = TRUE)
end_time <- Sys.time()You can check the condition number with #system.time(condnum <- Matrix::rcond(int_mat)) but it may take a few minutes.
mask <- readNIfTI(fname = "smoothed_mask_125.nii.gz")
fn = "ADNI_ICBM9P_mni_4step_MDT.img"
dimscan <- c(220,220,220)
img_template <- array(readBin(fn, what = "int", n = prod(dimscan), size = 2, signed = FALSE), dim = dimscan)
#ortho2(as.nifti(resize.image(mask, img_template)$array), add.orient = FALSE,
# text = "Template with smoothed mask", text.cex = 1.25)allvoxels_data <- basis_mat %*% data_projected["002_S_0413",] %>%
as.matrix(.) %>%
data.frame("voxel" = voxel_grid_nonzero_mask, "img" = .) %>%
left_join(data.frame("voxel" = 1:prod(dims_mask)), .) %>%
.[,2] %>%
array(., dim = dims_mask)## Joining, by = "voxel"
orig_img <- readBin("002_S_0413_sc_jacobian.img", what = "int", n = 220^3, size = 2, signed = FALSE) %>%
array(., dim = c(220,220,220)) %>%
resize_image(mask = mask, img = .) %>%
.$array %>%
nifti(.)
img_diff <- allvoxels_data - orig_img
hist_data <- data.frame(dat = c(as.vector(orig_img), as.vector(allvoxels_data)),
type = rep(c("Original", "Projected"),
each = length(as.vector(orig_img))))
hist_data %>%
drop_na %>%
filter(dat > 0) %>%
ggplot(., aes(dat, fill = type)) +
geom_density(alpha = 0.1) +
scale_fill_manual(values = c("yellow","darkblue"))slices.plot(img_diff[is.na(img_diff) == FALSE] , voxels = voxel_grid_nonzero_mask)load("ADNI_bl.RData") ###where data_ADNI_bl_817 is stored
pred_all <- data.frame(matrix(NA, nrow = nrow(data_ADNI_bl_817),ncol = 3),
select(data_ADNI_bl_817, Dx, AGE, PTID))
pred_all <- filter(pred_all,
AGE >= min(pred_all$AGE[pred_all$Dx == "N"], na.rm = TRUE) &
AGE <= max(pred_all$AGE[pred_all$Dx == "N"], na.rm = TRUE))
rownames(pred_all) <- pred_all$PTID
names(pred_all) <- c("AgePredLower", "AgePredMed", "AgePredUpper",
"Dx", "ChronoAge", "PTID")
pred_all$Dx <- ordered(pred_all$Dx, levels = c("N","MCI","AD"),
labels = c("Control", "MCI", "AD"))# ggplot(data = pred_all, aes(x = ChronoAge, fill = fct_rev(Dx))) +
# geom_histogram(position = "stack",
# bins = nclass.FD(pred_all$ChronoAge)) +
# labs(x = "Chronological age", y = "Frequency", fill = "Diagnosis") +
# scale_fill_manual(values = rev(c("#009E73", "gold3", "#D55E00")),
# guide = guide_legend(reverse = TRUE)) +
# theme_classic()
ggplot(data = pred_all, aes(x = ChronoAge,
fill = fct_rev(Dx),
color = fct_rev(Dx)#, lty = fct_rev(Dx)
)) +
stat_bin(geom="step", bins = nclass.FD(pred_all$ChronoAge),
direction="vh",
position = "identity", size = 1.2, alpha = 0.5, pad = TRUE
)+
labs(x = "Chronological age", y = "Frequency") +
scale_color_manual(name = "Diagnosis",
values = rev(c("#009E73", "gold3", "#D55E00")),
guide = guide_legend(reverse = TRUE,
override.aes = list(alpha = 1,
size = 0.9))) +
theme_classic() +
geom_hline(yintercept = 0, colour = "white", size = 1.5) +
expand_limits(y = 0) +
scale_y_continuous(expand = c(0, 0))# ggplot(data = pred_all, aes(x = ChronoAge, stat(count),
# #fill = fct_rev(Dx),
# color = fct_rev(Dx)#, lty = fct_rev(Dx)
# )) +
# geom_density(position = "identity", alpha = 0, adjust = 1/2) +
# labs(x = "Chronological age", y = "Frequency") +
# scale_color_manual(name = "Diagnosis",
# values = rev(c("#009E73", "gold3", "#D55E00")),
# guide = guide_legend(reverse = TRUE,
# override.aes = list(alpha = 1))) +
# scale_fill_manual(name = "Diagnosis",
# values = rev(c("#009E73", "gold3", "#D55E00")),
# guide = guide_legend(reverse = TRUE,
# override.aes = list(alpha = 1))) +
#theme_classic()
# ggplot(data = pred_all, aes(x = ChronoAge,
# fill = NULL,
# color = fct_rev(Dx)#, lty = fct_rev(Dx)
# )) +
# geom_histogram(bins = nclass.FD(pred_all$ChronoAge),
# position = "identity", size = 1.2, alpha = 0.33,
# fill="transparent"
# )+
# labs(x = "Chronological age", y = "Frequency") +
# scale_color_manual(name = "Diagnosis",
# values = rev(c("#009E73", "gold3", "#D55E00")),
# guide = guide_legend(reverse = TRUE,
# override.aes = list(alpha = 1,
# size = 0.9))) +
# theme_classic()
# ggplot(data = pred_all, aes(x = ChronoAge,
# fill = fct_rev(Dx)
# )) +
# geom_histogram(bins = nclass.FD(pred_all$ChronoAge),
# position = "identity", size = 1.2, alpha = 0.33
# )+
# labs(x = "Chronological age", y = "Frequency") +
# scale_fill_manual(name = "Diagnosis",
# values = rev(c("#009E73", "gold3", "#D55E00")),
# guide = guide_legend(reverse = TRUE,
# override.aes = list(alpha = 1,
# size = 0.9))) +
# theme_classic()
# qplot(ChronoAge, data = pred_all,
# fill = fct_rev(Dx),
# facets = Dx ~ ., bins = nclass.FD(pred_all$ChronoAge)) +
# labs(x = "Chronological age", y = "Frequency", fill = "Diagnosis") +
# scale_fill_manual(values = rev(c("#009E73", "gold3", "#D55E00")),
# guide = guide_legend(reverse = TRUE)) +
# theme_classic()kable(pred_all %>%
group_by(Dx) %>%
summarise(N = n(),
"Min." = min(ChronoAge),
"1st Qu." = quantile(ChronoAge, probs = 0.25),
"Median" = quantile(ChronoAge, probs = 0.5),
"Mean" = mean(ChronoAge),
"3rd Qu." = quantile(ChronoAge, probs = 0.75),
"Max." = max(ChronoAge))
)| Dx | N | Min. | 1st Qu. | Median | Mean | 3rd Qu. | Max. |
|---|---|---|---|---|---|---|---|
| Control | 229 | 59.9 | 72.300 | 75.60 | 75.86943 | 78.500 | 89.6 |
| MCI | 387 | 60.1 | 70.850 | 75.60 | 75.32636 | 80.400 | 89.3 |
| AD | 180 | 59.9 | 70.975 | 76.15 | 75.86444 | 81.575 | 89.1 |
start_pred <- Sys.time()
### Prediction on MCI and AD subjects
train_PTID <- pred_all$PTID[pred_all$Dx == "Control"]
test_PTID <- pred_all$PTID[pred_all$Dx != "Control"]
fqr_prediction(train_sub = train_PTID,
test_sub = test_PTID,
pred_table = pred_all, return_func_coef = TRUE,
data_demo = data_ADNI_bl_817,
tau_levels = tau_lev) %>%
list2env(., envir = .GlobalEnv)## NULL
## <environment: R_GlobalEnv>
pred_all <- pred_table
### Prediction on control subjects - cross-validation
k <- 10
pred_N <- filter(pred_all, Dx == "Control")
foldId <- cut(1:length(pred_N$PTID), breaks = k, labels=FALSE) %>%
sample()
table(foldId)## foldId
## 1 2 3 4 5 6 7 8 9 10
## 23 23 23 23 23 22 23 23 23 23
cbind(pred_N, foldId = foldId) %>%
group_by(foldId) %>%
summarise(AGEMEAN = mean(ChronoAge), MIN = min(ChronoAge), MAX = max(ChronoAge))## # A tibble: 10 x 4
## foldId AGEMEAN MIN MAX
## <int> <dbl> <dbl> <dbl>
## 1 1 77.7 70.1 89.6
## 2 2 76.1 71.6 84.4
## 3 3 75.4 70 84.8
## 4 4 73.5 59.9 84.2
## 5 5 77.1 63.2 84.8
## 6 6 75.4 65.1 88.6
## 7 7 74.6 65.4 84.7
## 8 8 76.9 70 87.6
## 9 9 76.1 70.7 86.2
## 10 10 75.9 62.8 86.9
for(i in seq_len(k)){
print(i)
train_PTID <- pred_N$PTID[which(foldId != i)]
test_PTID <- pred_N$PTID[which(foldId == i)]
pred_all <- fqr_prediction(train_sub = train_PTID,
test_sub = test_PTID,
pred_table = pred_all,
data_demo = data_ADNI_bl_817,
tau_levels = tau_lev)
}## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
end_pred <- Sys.time()for (eig in 1:3){
cat("Eigenimage", eig)
basis_mat %*% -evec[,eig] %>%
slices.plot(.@x, voxels = voxel_grid_nonzero_mask)
}## Eigenimage 1
## Eigenimage 2
## Eigenimage 3
###Prediction accuracy metrics###################################
nonmonotonic_rows <- which(with(pred_all, AgePredMed < AgePredLower | AgePredMed > AgePredUpper))
pred_all$OutPredInt <- with(pred_all, ChronoAge > AgePredUpper | ChronoAge < AgePredLower)
pred_all$OverPrediction <- with(pred_all, ChronoAge < AgePredLower)
pred_all$brainPAD <- with(pred_all, AgePredMed - ChronoAge)
p1 <- ggplot(pred_all, aes(x = AgePredMed, y = ChronoAge, color = fct_rev(Dx))) +
geom_point(alpha = 0.6) +
geom_smooth(aes(color = fct_rev(Dx), fill = fct_rev(Dx)),
alpha = 0.2, size = 1.6) +
#stat_smooth (geom="line", alpha=0.6, size=1.6) +
geom_abline(slope = 1, intercept = 0) +
theme_classic() +
labs(x = "Predicted age", y= "Chronological age") +
scale_color_manual(name = "Diagnosis",
values = rev(c("#009E73", "gold3", "#D55E00"))) +
scale_fill_manual(name = "Diagnosis",
values = rev(c("#009E73", "gold3", "#D55E00")),
guide = guide_legend(override.aes = list(alpha = 0.3))) +
guides(fill = guide_legend(reverse=TRUE), color = guide_legend(reverse=TRUE))
#rib_data <- ggplot_build(p1)$data[[2]]
#rib_data$Dx <- levels(pred_all$Dx) %>% rev() %>% rep(., each = 80)
#ggplot(data = pred_all, aes(x = AgePredMed, y = ChronoAge, color = fct_rev(Dx))) +
# geom_abline(slope = 1, intercept = 0) +
# geom_point(alpha = 0.5) +
# stat_smooth(alpha = 0, size = 2) +
# geom_line(data = rib_data, aes(x = x,y = ymax, color = factor(Dx)), inherit.aes = FALSE, linetype = 2, size = 1.2) +
# geom_line(data = rib_data, aes(x = x,y = ymin, color = factor(Dx)), inherit.aes = FALSE, linetype = 2, size = 1.2) +
# theme_classic() +
# labs(x = "Predicted age", y = "Chronological age") +
# scale_color_manual(name = "Diagnosis",
# values = rev(c("#009E73", "gold3", "#D55E00"))) #+
## geom_ribbon(data=rib_data,aes(x=x,ymin=ymin,ymax=ymax, color = factor(Dx)), fill=NA,linetype=1, inherit.aes = FALSE, show.legend = FALSE)
print(p1)## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(pred_all, aes(x = AgePredMed, y = brainPAD, color = fct_rev(Dx))) +
geom_point(alpha = 0.6) +
geom_smooth(aes(color = fct_rev(Dx), fill = fct_rev(Dx)),
alpha = 0.2, size = 1.6) +
geom_hline(yintercept = 0, lty = 2) +
theme_classic() +
labs(x = "Predicted age", y= "brainPAD") +
scale_color_manual(name = "Diagnosis",
values = rev(c("#009E73", "gold3", "#D55E00"))) +
scale_fill_manual(name = "Diagnosis",
values = rev(c("#009E73", "gold3", "#D55E00")),
guide = guide_legend(override.aes = list(alpha = 0.3))) +
guides(fill = guide_legend(reverse=TRUE), color = guide_legend(reverse=TRUE))## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
tab_results <- pred_all %>%
drop_na %>%
group_by(Dx) %>%
summarise(N = n(),
"Age (min)" = min(ChronoAge),
"Age (max)" = max(ChronoAge),
MAE=mean(abs(AgePredMed- ChronoAge)),
#weighted_MAE = mean(abs(AgePredMed- ChronoAge))/diff(range(ChronoAge)),
RMSE=sqrt(mean((AgePredMed- ChronoAge)^2)),
Cor = cor(AgePredMed,ChronoAge),
Cor_test = cor.test(AgePredMed,ChronoAge)$p.value,
Coverage = 1 - sum(OutPredInt)/n(),
OverPred = sum(OverPrediction)/n(),
Avg_Width = mean(abs(AgePredUpper - AgePredLower)))
hist_width <- pred_all %>%
drop_na %>%
group_by(Dx) %>%
summarise(Avg_Width = mean(abs(AgePredUpper - AgePredLower)))
kable(tab_results) ###, n = 1e3)| Dx | N | Age (min) | Age (max) | MAE | RMSE | Cor | Cor_test | Coverage | OverPred | Avg_Width |
|---|---|---|---|---|---|---|---|---|---|---|
| Control | 229 | 59.9 | 89.6 | 3.487621 | 4.429496 | 0.4779535 | 0e+00 | 0.8558952 | 0.0524017 | 13.59292 |
| MCI | 387 | 60.1 | 89.3 | 4.994269 | 6.118134 | 0.4614530 | 0e+00 | 0.6795866 | 0.2377261 | 13.78424 |
| AD | 180 | 59.9 | 89.1 | 5.155709 | 6.271486 | 0.3767723 | 2e-07 | 0.6444444 | 0.2777778 | 13.06782 |
#with(pred_all %>% drop_na, cor(brainPAD, ChronoAge, method = "spearman"))
#ddd <- BlandAltmanLeh::bland.altman.plot(pred_all$AgePredMed, pred_all$ChronoAge, graph.sys = "ggplot2")
#ggplot_build(ddd)$data[[2]]The \(\lambda\) value for which the minimum is attained is 1279.7348611. The number of subjects for which the interval is not valid (that is, the case when the lower extreme is higher than the upper one) is 1.
func_coef_voxel_nonzero <- basis_mat %*% evec %*% model_med_coef
slices.plot(func_coef_voxel_nonzero@x, voxels = voxel_grid_nonzero_mask)func_coef_voxel_nonzero <- basis_mat %*% evec %*% model_lower_coef
slices.plot(func_coef_voxel_nonzero@x, voxels = voxel_grid_nonzero_mask)func_coef_voxel_nonzero <- basis_mat %*% evec %*% model_upper_coef
slices.plot(func_coef_voxel_nonzero@x, voxels = voxel_grid_nonzero_mask)data_forest_center <- pred_all%>%
arrange(. ,Dx, AgePredMed) %>%
group_by(Dx) %>%
mutate(id = row_number())
labs_PIplot <- pred_all %>%
group_by(Dx) %>%
summarise(N = n()) %>%
unite(., sep = " (N = ") %>%
simplify() %>%
paste0(.,")")
data_forest_center$Dx <- ordered(data_forest_center$Dx,
levels = c("Control", "MCI", "AD"),
labels = labs_PIplot)
data_forest_center[,1:3] <- data_forest_center[,1:3]- data_forest_center$ChronoAge
excesspoints <- filter(data_forest_center, OutPredInt == TRUE) %>%
select(AgePredLower, Dx, id)
excesspoints$x <- 30*sign(excesspoints$AgePredLower)
pred_interval <- list(data_forest_center = data_forest_center,
excesspoints = excesspoints,
tau = 1:3/4)
forest.rq.plot(pred.interval=pred_interval, mycol = c("#009E73", "gold3", "#D55E00"))ggplot(pred_all,
aes(x=abs(AgePredUpper - AgePredLower), color= fct_rev(Dx))) +
stat_bin(geom="step",
direction="vh",
bins = 10,
position = "identity", size = 1.2, alpha = 0.5, pad = TRUE
)+
labs(x = "Prediction intervals width (years)", y = "Frequency", color = "Diagnosis") +
scale_color_manual(name = "Diagnosis",
values = rev(c("#009E73", "gold3", "#D55E00")),
guide = guide_legend(reverse = TRUE,
override.aes = list(alpha = 1,
size = 0.9))) +
theme_classic(base_size = 18) +
geom_hline(yintercept = 0, colour = "white", size = 1.5) +
expand_limits(y = 0) +
scale_y_continuous(expand = c(0, 0))ggplot(pred_all,
aes(x=abs(AgePredUpper - AgePredLower), color=Dx)) +
geom_density(alpha=0.4) +
labs(x = "Prediction intervals width (years)", y = "Density", color = "Diagnosis") +
scale_color_manual(values=c("#009E73", "gold3", "#D55E00")) +
xlim(5,22) +
theme_classic(base_size = 18)+
expand_limits(y = 0) +
scale_y_continuous(expand = c(0, 0))# ggplot(pred_all, aes(x = pred_all$ChronoAge, fill = OverPrediction, color = OverPrediction)) +
# geom_histogram(position = "stack", bins = nclass.FD(pred_all$ChronoAge)) +
# labs(x = "Chronological age", y = "Frequency") +
# scale_fill_grey(start=1, end=0.3) +
# scale_color_grey(start=0, end=0) +
# theme_classic(base_size = 18) +
# theme(legend.position = "none")
#
# ggplot(pred_all,
# aes(x = pred_all$ChronoAge,
# fill = OverPrediction, color = OverPrediction
# )) +
# stat_density(aes(fill = OverPrediction, alpha = 0.5),
# position = "identity")
ggplot(data = pred_all, aes(x = ChronoAge,
color = OverPrediction)) +
stat_bin(geom="step", bins = nclass.FD(pred_all$ChronoAge),
position = "identity", size = 1.2,
alpha = 0.5,
direction = "vh", pad = TRUE)+
labs(x = "Chronological age", y = "Frequency", colour = "Overprediction") +
theme_classic(base_size = 18) +
theme(legend.key.size = unit(1, "cm")) +
geom_hline(yintercept = 0, colour = "white", size = 1.5) +
guides(colour = guide_legend(override.aes = list(alpha = 1)))+
expand_limits(y = 0) +
scale_y_continuous(expand = c(0, 0))library("ADNIMERGE", lib.loc="ADNIMERGE")
dxsum_myPTID <- dxsum[,c(3, 5, 8, 33)]
dxsum_myPTID$PTID_reduced <- paste0("S_", stringr::str_pad(dxsum_myPTID$RID, 4, pad = 0))
VISC <- unique(dxsum_myPTID$VISCODE)
VISC_periods <- startsWith(as.character(VISC), "m") %>% VISC[.] %>% substring(., 2) %>%
as.integer %>% sort %>% str_pad(., width = 2, pad = 0) %>%
paste0("m",.) #%>% ordered(., levels = ., labels = .)
#http://adni.loni.usc.edu/data-dictionary-search/?q=APOE4
VISC <- ordered(VISC, levels = c("bl", VISC_periods)) %>% na.omit %>% sort
VARS <- c("APOE4", "ADAS11", "ADAS13", "ADASQ4",
"MMSE", "DIGITSCOR", "TRABSCOR")
VARS <- adnimerge %>% select("APOE4", "ADAS11", "ADAS13", "ADASQ4",
"MMSE", "DIGITSCOR", "TRABSCOR"
#,starts_with("RAVLT"), -ends_with(".bl")
) %>% names()
VISC_selected <- c("bl", "m12", "m24", "m36", "m48") #labels = c("Baseline", "12", "24","36","48"))
validation_table <- cbind(expand.grid(VARS, VISC_selected), "brainPAD_corr" = NA,
"brainPAD_corr_pvalue" = NA, "OverPred_pvalue" = NA)
names(validation_table)[1:2] <- c("VARS", "VISC")
###APOE at baseline
pred_all$PTID_reduced <- unlist(strsplit(pred_all$PTID, "S_")) %>%
.[seq(2, 2*length(pred_all$PTID),by = 2)] %>%
substr(., start = 1, stop = 4) %>%
paste0("S_", .)
for(i in 1:length(VISC_selected)){
adnimerge_sub <- adnimerge %>%
filter(VISCODE == VISC_selected[i])
adnimerge_sub$PTID_reduced <- paste0("S_",
stringr::str_pad(adnimerge_sub$RID, 4, pad = 0))
adnimerge_sub <- left_join(pred_all,adnimerge_sub[,-c(1:9)], by= "PTID_reduced") %>%
dplyr::select(OverPrediction, brainPAD, VARS)
index1 <- (i - 1)*length(VARS) + 1
validation_table[index1:(index1+length(VARS)-1), 3:4] <-
sapply(select(adnimerge_sub, VARS),
function(x) as.numeric(cor.test(x, adnimerge_sub$brainPAD)[c("estimate","p.value")])
) %>% t()
validation_table[index1:(index1+length(VARS)-1), 5] <-
sapply(select(adnimerge_sub, VARS),
function(y)
t.test(y ~ adnimerge_sub$OverPrediction, alternative = "two.sided")$statistic
)
if(i == 1){
print(with(adnimerge_sub, chisq.test(OverPrediction, APOE4)))
anno_df <- compare_means(brainPAD ~ APOE4, data = adnimerge_sub,
method = "wilcox.test",
alternative = "greater",
p.adjust.method = "holm") %>%
mutate(y_pos = 40, p.adj = base::format.pval(p.adj))
p1 <- ggboxplot(adnimerge_sub, x = "APOE4", y = "brainPAD",
select = c("0","1","2"),
color = "APOE4",
palette = RColorBrewer::brewer.pal(9,"Set1")[c(7,4,9)]) +
geom_signif(data=anno_df,
aes(xmin = group1, xmax = group2, annotations = paste0("p = ", p.adj),
y_position = c(25, 34, 29)), manual= TRUE)+
ylim(c(NA, 35)) +
theme_classic2(base_size = 18) +
theme(legend.position = "none") +
labs(x = "ApoE4")
#print(p1)
#cat("-------------------------------------------------------------------\n")
}
}##
## Pearson's Chi-squared test
##
## data: OverPrediction and APOE4
## X-squared = 45.959, df = 2, p-value = 1.047e-10
print(p1)plot_brainPAD <- validation_table %>%
drop_na %>%
filter(VARS != "APOE4") %>%
ggplot(aes(factor(VISC), brainPAD_corr, color = VARS)) +
geom_point(aes(shape=
ifelse(brainPAD_corr_pvalue<0.05,
"Significant", "Not significant")), size = 2) +
scale_shape_manual(values=c(1, 19)) +
scale_color_brewer(palette = "Dark2") +
geom_line(aes(group = VARS)) +
#ylim(c(-0.25,0.25)) +
theme_classic(base_size = 14) +
#theme(legend.key.size = unit(0, "lines")) +
labs(x = "Visit code (months)", y = "Correlation with brainPAD",
color = "Variables", shape = "Significance (5%)")
plot_OverPred <- validation_table %>%
drop_na %>%
filter(VARS != "APOE4") %>%
ggplot(aes(factor(VISC), OverPred_pvalue, color = VARS)) +
geom_hline(yintercept = c(-1.96, 1.96), lty = 2) +
geom_point(aes(shape=ifelse(abs(OverPred_pvalue)<1.96,
"Not significant", "Significant")),
size = 2) +
scale_shape_manual(values=c(1, 19)) +
scale_color_brewer(palette = "Dark2") +
geom_line(aes(group = VARS)) +
#ylim(c(0,1)) +
theme_classic(base_size = 14) +
labs(x = "Visit code (months)", y = "t-statistic (overprediction)",
color = "Variables", shape = "Significance (5%)")
legend_brainPAD <- get_legend(plot_brainPAD)
prow <- cowplot::plot_grid(plot_brainPAD +
theme(legend.position="none",
axis.text.x=element_blank(),
axis.title.x=element_blank(),
axis.ticks.x=element_blank(),
plot.margin=margin(c(0.1, 0, 0, 0), unit="cm")),
NULL,
plot_OverPred + theme(legend.position="none",
plot.margin=margin(c(0, 0, 0, 0), unit="pt")),
align = 'hv', ncol = 1,
rel_heights = c(1,-0.1,1),
labels=c("A",NA,"B"), label_x = 0.022)
#pdf("validationplot.pdf",width=5,height=5,paper='special')
cowplot::plot_grid(prow, NULL, legend_brainPAD, nrow = 1, rel_widths = c(0.8,0.1,0.4), axis = "b")#dev.off()